Data

Parameters

suffix = "01_22"
data_to_read = ""

functions

source_from_github(repositoy = "DEG_functions",version = "0.2.24")
ℹ SHA-1 hash of file is a183df1c565702ecd8ed338bb2abfb0e13415d8e
source_from_github(repositoy = "HMSC_functions",version = "0.1.02",script_name = "functions.R")
ℹ SHA-1 hash of file is 53b38a31ba24251739c046cc18406d5b3ec0f896

Enrichment analysis HMSC vs ACC

patient.ident = all_acc_cancer_cells$patient.ident %>% as.data.frame()
patient.ident[,1] = as.character(patient.ident[,1])
patient.ident[patient.ident[,1] == "ACC1",] = "HMSC"
patient.ident[,1] = as.factor(patient.ident[,1])
all_acc_cancer_cells = AddMetaData(object = all_acc_cancer_cells,metadata = patient.ident,col.name = "patient.ident")
all_acc_cancer_cells = SetIdent(all_acc_cancer_cells, value ="patient.ident")
acc_deg <- FindMarkers(all_acc_cancer_cells, ident.1 = "HMSC",logfc.threshold = 1.5,features = VariableFeatures(all_acc_cancer_cells))
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells),fdr_Cutoff = 0.01,ident.1 = "HMSC",ident.2 = "ACC",show_by = 1)

cell cycle filtering

conflict_prefer("intersect", "base")
[conflicted] Will prefer base::intersect over any other package.

Before cc filtering

FeaturePlot(object = all_acc_cancer_cells,features = hallmark_name) + ggtitle("Before cc filtering")  & scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.

After cc filtering

FeaturePlot(object = all_acc_cancer_cells_ccFiltered,features = hallmark_name) + ggtitle("After cc filtering") & scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.

Enrichment analysis filtered HMSC vs ACC

patient.ident = all_acc_cancer_cells_ccFiltered$patient.ident %>% as.data.frame()
patient.ident[,1] = as.character(patient.ident[,1])
patient.ident[patient.ident[,1] == "ACC1",] = "HMSC"
patient.ident[,1] = as.factor(patient.ident[,1])
all_acc_cancer_cells_ccFiltered = AddMetaData(object = all_acc_cancer_cells_ccFiltered,metadata = patient.ident,col.name = "patient.ident")
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
acc_deg <- FindMarkers(all_acc_cancer_cells_ccFiltered, ident.1 = "HMSC",logfc.threshold = 1.5,features = VariableFeatures(all_acc_cancer_cells_ccFiltered))
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells_ccFiltered),fdr_Cutoff = 0.01,ident.1 = "HMSC",ident.2 = "ACC",show_by = 1)

MYB expression

all_acc_cancer_cells = SetIdent(object = all_acc_cancer_cells,value = "patient.ident") #active snn graph
FeaturePlot(object = all_acc_cancer_cells,features = "MYB",label = T)

CNV

new.cluster.ids <- c("cancer", #0
                     "cancer", #1
                     "CAF", #2
                     "cancer", #3
                     "Endothelial", #4
                     "cancer", #5
                     "cancer", #6
                     "CAF", #7
                     "CAF", #8
                     "CAF", #9
                     "cancer", #10
                     "CAF", #11
                     "cancer", #12
                     "cancer", #13
                     "cancer", #14
                     "cancer", #15
                     "cancer", #16
                     "WBC", #17
                     "CAF" #18
                     )
#rename idents:
acc_all_cells = SetIdent(object = acc_all_cells,value = "RNA_snn_res.1") #active snn graph
names(new.cluster.ids) <- levels(acc_all_cells) #add snn graph levels to new.cluster.ids
acc_all_cells@meta.data[["seurat_clusters"]] = acc_all_cells@meta.data[["RNA_snn_res.1"]]
acc_all_cells = SetIdent(object = acc_all_cells,value = "seurat_clusters")
acc_all_cells <- RenameIdents(acc_all_cells, new.cluster.ids) 

# divide "cancer" into patients:
cell_types = acc_all_cells@active.ident %>% as.data.frame()
cell_types[,1]<- as.character(cell_types[,1])
cell_types = cbind(cell_types,acc_all_cells$patient.ident) %>% setnames(old = names(.), 
         new = c('cell_type','patient'))
cell_types[cell_types$cell_type == "cancer",] = cell_types[cell_types$cell_type == "cancer",2]


# hmsc_rows = (startsWith(x = rownames(cell_types),prefix = "ACC.plate2") | startsWith(x = rownames(cell_types),prefix = "ACC1.")) & cell_types[,1] == "cancer" 
# acc_rows = !(startsWith(x = rownames(cell_types),prefix = "ACC.plate2") | startsWith(x = rownames(cell_types),prefix = "ACC1.")) & cell_types[,1] == "cancer" 
# cell_types[,1][hmsc_rows]  = "HMSC"
# cell_types[,1][acc_rows]  = "ACC"

#add to metadata:
cell_types[,2] = NULL 
cell_types[cell_types$cell_type == "ACC1",] = "HMSC"
acc_all_cells = AddMetaData(object =acc_all_cells ,metadata = cell_types,col.name = "cell.type")

CNV UMAP

CNV plot

CNV plot ## {-}

Original score

original_myo_genes = c( "TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
original_lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI" )
myoscore=apply(all_acc_cancer_cells@assays[["RNA"]][original_myo_genes,],2,mean)
lescore=apply(all_acc_cancer_cells@assays[["RNA"]][original_lum_genes,],2,mean)
all_acc_cancer_cells=AddMetaData(all_acc_cancer_cells,lescore-myoscore,"luminal_over_myo")
FeaturePlot(object = all_acc_cancer_cells,features = "luminal_over_myo")

data = FetchData(object = all_acc_cancer_cells,vars = "luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=luminal_over_myo)) + 
  geom_density() 
)

only_acc_cancer_cells = subset(x = all_acc_cancer_cells,patient.ident !="HMSC")
myo_cells_num = subset(x = only_acc_cancer_cells,luminal_over_myo >1) %>% ncol() /ncol(all_acc_cancer_cells)
lum_cells_num = subset(x = only_acc_cancer_cells,luminal_over_myo <(-1)) %>% ncol()/ncol(all_acc_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

myoscore=apply(acc1_cancer_cells@assays[["RNA"]][original_myo_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]][original_lum_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "luminal_over_myo")
print(
    data %>% 
    ggplot(aes( x=luminal_over_myo)) + 
    geom_density() 
  )

myo_cells_num = subset(x = acc1_cancer_cells,luminal_over_myo >0) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,luminal_over_myo <(0)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

0.35 Most correlated score

Myo genes

myo_protein_markers = c("CNN1", "TP63","ACTA2")
# undebug(top_correlated)
top_myo  = top_correlated(dataset = acc1_cancer_cells, genes = myo_protein_markers,threshold = 0.35)
print("Number of genes = " %>% paste(length(top_myo)))
[1] "Number of genes =  20"
message("Names of genes:")
Names of genes:
top_myo %>% head(30)
 [1] "COL16A1"     "RP1-39G22.4" "ACTG2"       "CD200"       "MYLK"        "TP63"        "KCNMB1"      "ADAMTS2"     "LOXL2"      
[10] "TPM2"        "CLIC3"       "SNCG"        "ACTA2"       "TAGLN"       "A2M"         "NGFR"        "CNN1"        "PPP1R14A"   
[19] "MYL9"        "POM121L9P"  
message("Genes that also apeared in the original score:")
Genes that also apeared in the original score:
base::intersect(top_myo,original_myo_genes) 
[1] "MYLK"  "TP63"  "ACTA2" "TAGLN"
myo_enrich_res = genes_vec_enrichment(genes = top_myo,background = rownames(acc1_cancer_cells),homer = T,title = "myo top enrichment",custom_pathways = luminal_gs)

myo_enrich_res

Lum genes

lum_protein_markers = c("KIT")
top_lum  = top_correlated(dataset = acc1_cancer_cells, genes = lum_protein_markers,threshold = 0.35)
Warning in cor(expression %>% t(), markers_average) :
  the standard deviation is zero
print("Number of genes = " %>% paste(length(top_lum)))
[1] "Number of genes =  22"
message("Names of genes:")
Names of genes:
top_lum %>% head(30)
 [1] "FBXO44"        "USP48"         "MPC2"          "SLC19A2"       "ATL2"          "B3GNT2"        "AC093162.5"   
 [8] "MAP2"          "KIT"           "GLRB"          "EFNA5"         "PCDHGB7"       "SLC29A1"       "SASH1"        
[15] "ALDH3B2"       "CCND1"         "RP11-254B13.1" "VSIG10"        "NDFIP2"        "SUSD6"         "SPPL2A"       
[22] "DSC2"         
message("Genes that also apeared in the original score:")
Genes that also apeared in the original score:
base::intersect(top_lum,original_lum_genes) 
[1] "KIT"
lum_enrich_res = genes_vec_enrichment(genes = top_lum,background = rownames(acc1_cancer_cells),homer = T,title = "lum top enrichment",custom_pathways = luminal_gs)

lum_enrich_res

top correlated score

myoscore=apply(acc1_cancer_cells@assays[["RNA"]][top_myo,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]][top_lum,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"top_cor_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "top_cor_luminal_over_myo")


data = FetchData(object = acc1_cancer_cells,vars = "top_cor_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=top_cor_luminal_over_myo)) + 
  geom_density() 
)

  myo_cells_num = subset(x = acc1_cancer_cells,top_cor_luminal_over_myo >0) %>% ncol() /ncol(all_acc_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,top_cor_luminal_over_myo <(0)) %>% ncol()/ncol(all_acc_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

enriched genes score

rownames(lum_enrich_res) = lum_enrich_res$pathway_name
lum_enriched_genes = lum_enrich_res[1,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,lum_protein_markers) #add original markers
rownames(myo_enrich_res) = myo_enrich_res$pathway_name
myo_enriched_genes = myo_enrich_res[1,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,myo_protein_markers) #add original markers
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")


data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)


myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(-2.5)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(-2.5)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

0.2 Most correlated score

myo Genes

myo_protein_markers = c("CNN1", "TP63","ACTA2")
top_myo  = top_correlated(dataset = acc1_cancer_cells, genes = myo_protein_markers,threshold = 0.2)
print("Number of genes = " %>% paste(length(top_myo)))
[1] "Number of genes =  473"
message("Names of genes:")
Names of genes:
top_myo %>% head(30)
 [1] "MIR429"        "EXOSC10"       "PLOD1"         "TNFRSF8"       "PDPN"          "FBLIM1"        "RP11-276H7.3" 
 [8] "HSPG2"         "EPHA8"         "IFNLR1"        "GRHL3"         "RP3-426I6.6"   "SDC3"          "RP11-490K7.1" 
[15] "RP11-73M7.1"   "TINAGL1"       "COL16A1"       "RP1-117O3.2"   "RP11-435D7.3"  "RP1-39G22.4"   "SCMH1"        
[22] "RP5-850O15.4"  "RP5-866L20.3"  "NEXN"          "RP4-714D9.2"   "RP5-837M10.3"  "RP5-964H19.1"  "CSF1"         
[29] "RP11-498A13.1" "RP11-96K19.2" 
message("Genes that also apeared in the original score:")
Genes that also apeared in the original score:
base::intersect(top_myo,original_myo_genes) 
[1] "MYLK"  "TP63"  "ACTA2" "DKK3"  "TAGLN" "CDH3" 
myo_enrich_res = genes_vec_enrichment(genes = top_myo,background = rownames(acc1_cancer_cells),homer = T,title = "myo top enrichment",custom_pathways = luminal_gs)

myo_enrich_res

Lum Genes

lum_protein_markers = c("KIT")
top_lum  = top_correlated(dataset = acc1_cancer_cells, genes = lum_protein_markers,threshold = 0.2)
Warning in cor(expression %>% t(), markers_average) :
  the standard deviation is zero
print("Number of genes = " %>% paste(length(top_lum)))
[1] "Number of genes =  1535"
message("Names of genes:")
Names of genes:
top_lum %>% head(30)
 [1] "RP11-206L10.11" "HES4"           "RP11-54O7.18"   "CPTP"           "RP3-395M20.8"   "TNFRSF14"       "RPL22"         
 [8] "ICMT"           "ERRFI1"         "RERE"           "H6PD"           "TMEM201"        "NMNAT1"         "UBE4B"         
[15] "APITD1-CORT"    "CENPS"          "RP4-635E18.7"   "FBXO2"          "FBXO44"         "UQCRHL"         "MST1P2"        
[22] "SDHB"           "PADI2"          "ARHGEF10L"      "AKR7A2"         "TMCO4"          "EIF4G3"         "USP48"         
[29] "NIPAL3"         "PIGV"          
message("Genes that also apeared in the original score:")
Genes that also apeared in the original score:
base::intersect(top_lum,original_lum_genes) 
[1] "KIT"    "EHF"    "LGALS3"
lum_enrich_res = genes_vec_enrichment(genes = top_lum,background = rownames(acc1_cancer_cells),homer = T,title = "lum top enrichment",custom_pathways = luminal_gs)

lum_enrich_res

correlated genes in original score

myo_intersected = intersect(top_myo,original_myo_genes) 
lum_intersected = intersect(top_lum,original_lum_genes) 
message("genes in myo score:")
genes in myo score:
myo_intersected
[1] "MYLK"  "TP63"  "ACTA2" "DKK3"  "TAGLN" "CDH3" 
message("genes in lum score:")
genes in lum score:
lum_intersected
[1] "KIT"    "EHF"    "LGALS3"
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[c("MYLK","TP63" ,"ACTA2", "DKK3", "TAGLN", "CDH3" ),],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[c("KIT" ,"EHF",  "LGALS3"),],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")


data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)


myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(2)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(1)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

enriched genes

rownames(lum_enrich_res) = lum_enrich_res$pathway_name
lum_enriched_genes = lum_enrich_res[3,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,lum_protein_markers) #add original markers
rownames(myo_enrich_res) = myo_enrich_res$pathway_name
myo_enriched_genes = myo_enrich_res[3,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,myo_protein_markers) #add original markers
message("genes in myo score:")
genes in myo score:
myo_enriched_genes
 [1] "FBLIM1"  "NEXN"    "NMNAT2"  "MYLK"    "CCDC50"  "IGFBP7"  "RAI14"   "ARAP3"   "SPARC"   "CALD1"   "LOXL2"   "COL5A1" 
[13] "ACTA2"   "DKK3"    "MSRB3"   "COL4A1"  "ACTN1"   "TPM1"    "TGFB1I1" "ADORA2B" "MXRA7"   "CNN1"    "TP63"    "ACTA2"  
message("genes in lum score:")
genes in lum score:
lum_enriched_genes
 [1] "PADI2"      "PATJ"       "PEX11B"     "APH1A"      "C1orf43"    "EFNA4"      "NECTIN4"    "SCYL3"      "ELF3"      
[10] "SOX13"      "IRF6"       "MED28"      "EPB41L4A"   "RGL2"       "C6orf132"   "TPD52L1"    "ICA1"       "MACC1"     
[19] "TRPS1"      "FAM83H"     "RASEF"      "ARRDC1"     "COMMD3"     "ANK3"       "GSTO2"      "PDCD4"      "EHF"       
[28] "ALDH3B2"    "SHANK2"     "SORL1"      "FKBP4"      "PTPN6"      "DUSP16"     "RERG"       "ADCY6"      "ERBB3"     
[37] "ERP29"      "SUSD6"      "RPS6KA5"    "SPINT1"     "FEM1B"      "TLE3"       "SCAMP2"     "CLN3"       "ADGRG1"    
[46] "ATP2C2"     "GGT6"       "MYO1D"      "ST6GALNAC2" "CYB5A"      "BLVRB"      "VRK3"       "SYCP2"      "TMPRSS2"   
[55] "LIMK2"      "KIT"       
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")


data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)


myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(0)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(-1)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

enriched genes and in original score

myo_enriched_genes = myo_enriched_genes[myo_enriched_genes %in% original_myo_genes]
lum_enriched_genes = lum_enriched_genes[lum_enriched_genes %in% original_lum_genes]

message("genes in myo score:")
genes in myo score:
myo_enriched_genes
[1] "MYLK"  "ACTA2" "DKK3"  "TP63"  "ACTA2"
message("genes in lum score:")
genes in lum score:
lum_enriched_genes
[1] "EHF" "KIT"
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")


data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)


myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(2)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(2)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

HPV

Only HMSC cancer cells:

HPV33_P3 = fread("./Data/HPV33_P3.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P3.df = HPV33_P3 %>% mutate(
  plate = gsub(x =HPV33_P3$plate, replacement = "",pattern = "_.*$") 
  %>% gsub(pattern = "-P",replacement = ".P") 
  %>% gsub(pattern = "-",replacement = "_",)
  )
HPV33_P3.df = HPV33_P3.df %>% dplyr::filter(HPV33_P3.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P3.df)  <- HPV33_P3.df$plate
HPV33_P3.df$plate = NULL


HPV33_P2 = fread("./Data/HPV33_P2.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P2.df = HPV33_P2 %>% mutate(
  plate = gsub(x =HPV33_P2$plate, replacement = "",pattern = "_.*$") 
  %>% gsub(pattern = "plate2-",replacement = "plate2_",)
  %>% gsub(pattern = "-",replacement = "\\.",)
  )
HPV33_P2.df = HPV33_P2.df %>% dplyr::filter(HPV33_P2.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P2.df)  <- HPV33_P2.df$plate
HPV33_P2.df$plate = NULL

HPV33 = rbind(HPV33_P3.df,HPV33_P2.df)
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = HPV33,col.name = "HPV33.reads")
FeaturePlot(acc1_cancer_cells,features = "HPV33.reads",max.cutoff = 40)

data = FetchData(object = acc1_cancer_cells,vars = "HPV33.reads")
print(
  data %>% 
  ggplot(aes( x=HPV33.reads)) + 
  geom_density()
)

---
title: "Title"
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
---

## Data

```{r}
acc1_cancer_cells = readRDS("./Data/acc1_cancer_cells_15KnCount_V3.RDS")
all_acc_cancer_cells = readRDS("./Data/acc_cancer_cells_V3.RDS")
acc_all_cells = readRDS("./Data/acc_tpm_nCount_mito_no146_15k_with_ACC1_.RDS")
luminal_pathways = c("CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_DN","CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_UP","CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_DN","CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_UP","HUPER_BREAST_BASAL_VS_LUMINAL_DN","LIM_MAMMARY_LUMINAL_PROGENITOR_UP","SMID_BREAST_CANCER_LUMINAL_B_UP" )

# add luminal pathways
luminal_gs = msigdbr(species = "Homo sapiens") %>%as.data.frame() %>% dplyr::filter(gs_name %in% luminal_pathways)%>% dplyr::distinct(gs_name, gene_symbol) %>% as.data.frame()

```

## Parameters

```{r warning=FALSE}
suffix = "01_22"
data_to_read = ""
```


## functions

```{r warning=FALSE}
source_from_github(repositoy = "DEG_functions",version = "0.2.24")
source_from_github(repositoy = "HMSC_functions",version = "0.1.02",script_name = "functions.R")
```


## Enrichment analysis HMSC vs ACC
```{r fig.width=8, echo=TRUE,results='hide',fig.keep='all'}
patient.ident = all_acc_cancer_cells$patient.ident %>% as.data.frame()
patient.ident[,1] = as.character(patient.ident[,1])
patient.ident[patient.ident[,1] == "ACC1",] = "HMSC"
patient.ident[,1] = as.factor(patient.ident[,1])
all_acc_cancer_cells = AddMetaData(object = all_acc_cancer_cells,metadata = patient.ident,col.name = "patient.ident")
all_acc_cancer_cells = SetIdent(all_acc_cancer_cells, value ="patient.ident")
acc_deg <- FindMarkers(all_acc_cancer_cells, ident.1 = "HMSC",logfc.threshold = 1.5,features = VariableFeatures(all_acc_cancer_cells))
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells),fdr_Cutoff = 0.01,ident.1 = "HMSC",ident.2 = "ACC",show_by = 1)
```

## cell cycle filtering {.tabset}

```{r warning=FALSE}
library(GSEABase)
library(conflicted)
conflict_prefer(name = "intersect", winner = "base",quiet = T)
hallmark_name = "GO_MITOTIC_CELL_CYCLE"
genesets  =getGmt("./Data/h.all.v7.0.symbols.pluscc.gmt")
var_features=all_acc_cancer_cells@assays$RNA@var.features
geneIds= genesets[[hallmark_name]]@geneIds
score <- apply(all_acc_cancer_cells@assays$RNA@scale.data[intersect(geneIds,var_features),],2,mean)
all_acc_cancer_cells=AddMetaData(all_acc_cancer_cells,score,hallmark_name)

#filter:
all_acc_cancer_cells_ccFiltered=all_acc_cancer_cells[,all_acc_cancer_cells@meta.data[[hallmark_name]]< 0.3]


min_threshold = min(all_acc_cancer_cells$GO_MITOTIC_CELL_CYCLE)
max_threshold = max(all_acc_cancer_cells$GO_MITOTIC_CELL_CYCLE)
```


### Before cc filtering


```{r}
FeaturePlot(object = all_acc_cancer_cells,features = hallmark_name) + ggtitle("Before cc filtering")  & scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
```

### After cc filtering

```{r}
FeaturePlot(object = all_acc_cancer_cells_ccFiltered,features = hallmark_name) + ggtitle("After cc filtering") & scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
```
## {-}

## Enrichment analysis filtered HMSC vs ACC
```{r fig.width=8, echo=TRUE,results='hide',fig.keep='all'}
patient.ident = all_acc_cancer_cells_ccFiltered$patient.ident %>% as.data.frame()
patient.ident[,1] = as.character(patient.ident[,1])
patient.ident[patient.ident[,1] == "ACC1",] = "HMSC"
patient.ident[,1] = as.factor(patient.ident[,1])
all_acc_cancer_cells_ccFiltered = AddMetaData(object = all_acc_cancer_cells_ccFiltered,metadata = patient.ident,col.name = "patient.ident")
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
acc_deg <- FindMarkers(all_acc_cancer_cells_ccFiltered, ident.1 = "HMSC",logfc.threshold = 1.5,features = VariableFeatures(all_acc_cancer_cells_ccFiltered))
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells_ccFiltered),fdr_Cutoff = 0.01,ident.1 = "HMSC",ident.2 = "ACC",show_by = 1)
```

## MYB expression

```{r fig.width=10}
all_acc_cancer_cells = SetIdent(object = all_acc_cancer_cells,value = "patient.ident") #active snn graph
FeaturePlot(object = all_acc_cancer_cells,features = "MYB",label = T)
```

## CNV {.tabset}



```{r}
#set cell types
new.cluster.ids <- c("cancer", #0
                     "cancer", #1
                     "CAF", #2
                     "cancer", #3
                     "Endothelial", #4
                     "cancer", #5
                     "cancer", #6
                     "CAF", #7
                     "CAF", #8
                     "CAF", #9
                     "cancer", #10
                     "CAF", #11
                     "cancer", #12
                     "cancer", #13
                     "cancer", #14
                     "cancer", #15
                     "cancer", #16
                     "WBC", #17
                     "CAF" #18
                     )
```


```{r fig.show='hide'}
#rename idents:
acc_all_cells = SetIdent(object = acc_all_cells,value = "RNA_snn_res.1") #active snn graph
names(new.cluster.ids) <- levels(acc_all_cells) #add snn graph levels to new.cluster.ids
acc_all_cells@meta.data[["seurat_clusters"]] = acc_all_cells@meta.data[["RNA_snn_res.1"]]
acc_all_cells = SetIdent(object = acc_all_cells,value = "seurat_clusters")
acc_all_cells <- RenameIdents(acc_all_cells, new.cluster.ids) 

# divide "cancer" into patients:
cell_types = acc_all_cells@active.ident %>% as.data.frame()
cell_types[,1]<- as.character(cell_types[,1])
cell_types = cbind(cell_types,acc_all_cells$patient.ident) %>% setnames(old = names(.), 
         new = c('cell_type','patient'))
cell_types[cell_types$cell_type == "cancer",] = cell_types[cell_types$cell_type == "cancer",2]


# hmsc_rows = (startsWith(x = rownames(cell_types),prefix = "ACC.plate2") | startsWith(x = rownames(cell_types),prefix = "ACC1.")) & cell_types[,1] == "cancer" 
# acc_rows = !(startsWith(x = rownames(cell_types),prefix = "ACC.plate2") | startsWith(x = rownames(cell_types),prefix = "ACC1.")) & cell_types[,1] == "cancer" 
# cell_types[,1][hmsc_rows]  = "HMSC"
# cell_types[,1][acc_rows]  = "ACC"

#add to metadata:
cell_types[,2] = NULL 
cell_types[cell_types$cell_type == "ACC1",] = "HMSC"
acc_all_cells = AddMetaData(object =acc_all_cells ,metadata = cell_types,col.name = "cell.type")
```
### CNV UMAP 

```{r fig.width=10}
library(infercnv)
library(tidyverse)
acc_annotation  = as.data.frame(acc_all_cells@meta.data[,"cell.type",drop = F])
acc_annotation = acc_annotation %>% rownames_to_column("orig.ident") 
acc_annotation = acc_annotation %>% mutate(orig.ident = gsub(x = acc_annotation$orig.ident,pattern = "\\.", replacement = "-") %>% 
  gsub(pattern = "_", replacement = "-"))
  

write.table(acc_annotation, "./Data/inferCNV/acc_annotation.txt", append = FALSE, 
            sep = "\t", dec = ".",row.names = FALSE, col.names = F)

infercnv_obj = CreateInfercnvObject(raw_counts_matrix="./Data/inferCNV/all.4icnv.txt", 
                                    annotations_file="./Data/inferCNV/acc_annotation.txt",
                                    delim="\t",gene_order_file="./Data/inferCNV/gencode_v19_gene_pos.txt"
                                    ,ref_group_names=c("CAF", "Endothelial", "WBC")) #groups of normal cells

infercnv_obj_default = infercnv::run(infercnv_obj, cutoff=1, out_dir='./Data/inferCNV/infercnv_output',
                                     cluster_by_groups=T, plot_steps=FALSE,
                                     denoise=TRUE, HMM=FALSE, no_prelim_plot=TRUE,
                                     png_res=300)
plot_cnv(infercnv_obj_default, output_format = "png",  write_expr_matrix = FALSE,out_dir = "./Data/inferCNV/",png_res	=800,obs_title = "Malignant cells",ref_title = "Normal cells")


cluster.info=FetchData(acc_all_cells,c("ident","orig.ident","UMAP_1","UMAP_2","nCount_RNA","nFeature_RNA","percent.mt","patient.ident","seurat_clusters"))
cluster.info$cell=rownames(cluster.info)

library(limma)
smoothed=apply(infercnv_obj_default@expr.data,2,tricubeMovingAverage, span=0.01)
cnsig=sqrt(apply((smoothed-1)^2,2,mean))
umap=cluster.info
names(cnsig)=umap$cell

acc_all_cells <- AddMetaData(object = acc_all_cells, metadata = cnsig, col.name = "copynumber")
acc_all_cells = SetIdent(object = acc_all_cells,value = "cell.type")
FeaturePlot(acc_all_cells, "copynumber",pt.size = 1, cols = c("lightblue","orange","red","darkred"),label = T,repel = T)
```


### CNV plot 

![CNV plot](/sci/labs/yotamd/lab_share/avishai.wizel/R_projects/HMSC/Data/inferCNV/infercnv.png)
## {-}

## Original score
```{r}
original_myo_genes = c( "TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
original_lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI" )
```

```{r}
myoscore=apply(all_acc_cancer_cells@assays[["RNA"]][original_myo_genes,],2,mean)
lescore=apply(all_acc_cancer_cells@assays[["RNA"]][original_lum_genes,],2,mean)
all_acc_cancer_cells=AddMetaData(all_acc_cancer_cells,lescore-myoscore,"luminal_over_myo")
FeaturePlot(object = all_acc_cancer_cells,features = "luminal_over_myo")
data = FetchData(object = all_acc_cancer_cells,vars = "luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=luminal_over_myo)) + 
  geom_density() 
)
```
```{r}
only_acc_cancer_cells = subset(x = all_acc_cancer_cells,patient.ident !="HMSC")
myo_cells_num = subset(x = only_acc_cancer_cells,luminal_over_myo >1) %>% ncol() /ncol(all_acc_cancer_cells)
lum_cells_num = subset(x = only_acc_cancer_cells,luminal_over_myo <(-1)) %>% ncol()/ncol(all_acc_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```

```{r}
myoscore=apply(acc1_cancer_cells@assays[["RNA"]][original_myo_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]][original_lum_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "luminal_over_myo")
data = FetchData(object = acc1_cancer_cells,vars = "luminal_over_myo")
print(
    data %>% 
    ggplot(aes( x=luminal_over_myo)) + 
    geom_density() 
  )
```
```{r}
myo_cells_num = subset(x = acc1_cancer_cells,luminal_over_myo >0) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,luminal_over_myo <(0)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```
## 0.35 Most correlated score {.tabset}

### Myo genes


```{r warning=FALSE, collapse=T}
myo_protein_markers = c("CNN1", "TP63","ACTA2")
# undebug(top_correlated)
top_myo  = top_correlated(dataset = acc1_cancer_cells, genes = myo_protein_markers,threshold = 0.35)
print("Number of genes = " %>% paste(length(top_myo)))
message("Names of genes:")
top_myo %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_myo,original_myo_genes) 
```
```{r}
myo_enrich_res = genes_vec_enrichment(genes = top_myo,background = rownames(acc1_cancer_cells),homer = T,title = "myo top enrichment",custom_pathways = luminal_gs)
myo_enrich_res
```
### Lum genes
```{r}
lum_protein_markers = c("KIT")
top_lum  = top_correlated(dataset = acc1_cancer_cells, genes = lum_protein_markers,threshold = 0.35)
print("Number of genes = " %>% paste(length(top_lum)))
message("Names of genes:")
top_lum %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_lum,original_lum_genes) 
```

```{r}
lum_enrich_res = genes_vec_enrichment(genes = top_lum,background = rownames(acc1_cancer_cells),homer = T,title = "lum top enrichment",custom_pathways = luminal_gs)
lum_enrich_res
```
### top correlated score
```{r}
myoscore=apply(acc1_cancer_cells@assays[["RNA"]][top_myo,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]][top_lum,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"top_cor_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "top_cor_luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "top_cor_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=top_cor_luminal_over_myo)) + 
  geom_density() 
)
  myo_cells_num = subset(x = acc1_cancer_cells,top_cor_luminal_over_myo >0) %>% ncol() /ncol(all_acc_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,top_cor_luminal_over_myo <(0)) %>% ncol()/ncol(all_acc_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")

```


###  enriched genes score
```{r}
rownames(lum_enrich_res) = lum_enrich_res$pathway_name
lum_enriched_genes = lum_enrich_res[1,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,lum_protein_markers) #add original markers
```

```{r}
rownames(myo_enrich_res) = myo_enrich_res$pathway_name
myo_enriched_genes = myo_enrich_res[1,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,myo_protein_markers) #add original markers
```

```{r}
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)

myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(-2.5)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(-2.5)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```


## {-}


##  0.2 Most correlated score {.tabset}

###  myo Genes


```{r warning=FALSE}
myo_protein_markers = c("CNN1", "TP63","ACTA2")
top_myo  = top_correlated(dataset = acc1_cancer_cells, genes = myo_protein_markers,threshold = 0.2)
print("Number of genes = " %>% paste(length(top_myo)))
message("Names of genes:")
top_myo %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_myo,original_myo_genes) 
```

```{r}
myo_enrich_res = genes_vec_enrichment(genes = top_myo,background = rownames(acc1_cancer_cells),homer = T,title = "myo top enrichment",custom_pathways = luminal_gs)
myo_enrich_res
```
###  Lum Genes

```{r}
lum_protein_markers = c("KIT")
top_lum  = top_correlated(dataset = acc1_cancer_cells, genes = lum_protein_markers,threshold = 0.2)
print("Number of genes = " %>% paste(length(top_lum)))
message("Names of genes:")
top_lum %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_lum,original_lum_genes) 
```

```{r}
lum_enrich_res = genes_vec_enrichment(genes = top_lum,background = rownames(acc1_cancer_cells),homer = T,title = "lum top enrichment",custom_pathways = luminal_gs)
lum_enrich_res
```
###  correlated genes in original score
```{r}
myo_intersected = intersect(top_myo,original_myo_genes) 
lum_intersected = intersect(top_lum,original_lum_genes) 
message("genes in myo score:")
myo_intersected

message("genes in lum score:")
lum_intersected


myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[c("MYLK","TP63" ,"ACTA2", "DKK3", "TAGLN", "CDH3" ),],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[c("KIT" ,"EHF",  "LGALS3"),],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)

myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(2)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(1)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```

### enriched genes
```{r}
rownames(lum_enrich_res) = lum_enrich_res$pathway_name
lum_enriched_genes = lum_enrich_res[3,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,lum_protein_markers) #add original markers
```

```{r}
rownames(myo_enrich_res) = myo_enrich_res$pathway_name
myo_enriched_genes = myo_enrich_res[3,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,myo_protein_markers) #add original markers
```

```{r}
message("genes in myo score:")
myo_enriched_genes

message("genes in lum score:")
lum_enriched_genes

myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)

myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(0)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(-1)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```




### enriched genes and in original score
```{r}
myo_enriched_genes = myo_enriched_genes[myo_enriched_genes %in% original_myo_genes]
lum_enriched_genes = lum_enriched_genes[lum_enriched_genes %in% original_lum_genes]

```

```{r}

message("genes in myo score:")
myo_enriched_genes

message("genes in lum score:")
lum_enriched_genes
myoscore=apply(acc1_cancer_cells@assays[["RNA"]]@data[myo_enriched_genes,],2,mean)
lescore=apply(acc1_cancer_cells@assays[["RNA"]]@data[lum_enriched_genes,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,lescore-myoscore,"enriched_luminal_over_myo")
FeaturePlot(object = acc1_cancer_cells,features = "enriched_luminal_over_myo")

data = FetchData(object = acc1_cancer_cells,vars = "enriched_luminal_over_myo")
print(
  data %>% 
  ggplot(aes( x=enriched_luminal_over_myo)) + 
  geom_density() 
)

myo_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo >(2)) %>% ncol() /ncol(acc1_cancer_cells)
lum_cells_num = subset(x = acc1_cancer_cells,enriched_luminal_over_myo <(2)) %>% ncol()/ncol(acc1_cancer_cells)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
  geom_bar(stat="identity") + ggtitle("ACC cell types")
```
```{r}

```

## {-}


# HPV

Only HMSC cancer cells:

```{r}
HPV33_P3 = fread("./Data/HPV33_P3.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P3.df = HPV33_P3 %>% mutate(
  plate = gsub(x =HPV33_P3$plate, replacement = "",pattern = "_.*$") 
  %>% gsub(pattern = "-P",replacement = ".P") 
  %>% gsub(pattern = "-",replacement = "_",)
  )
HPV33_P3.df = HPV33_P3.df %>% dplyr::filter(HPV33_P3.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P3.df)  <- HPV33_P3.df$plate
HPV33_P3.df$plate = NULL


HPV33_P2 = fread("./Data/HPV33_P2.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P2.df = HPV33_P2 %>% mutate(
  plate = gsub(x =HPV33_P2$plate, replacement = "",pattern = "_.*$") 
  %>% gsub(pattern = "plate2-",replacement = "plate2_",)
  %>% gsub(pattern = "-",replacement = "\\.",)
  )
HPV33_P2.df = HPV33_P2.df %>% dplyr::filter(HPV33_P2.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P2.df)  <- HPV33_P2.df$plate
HPV33_P2.df$plate = NULL

HPV33 = rbind(HPV33_P3.df,HPV33_P2.df)
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = HPV33,col.name = "HPV33.reads")
FeaturePlot(acc1_cancer_cells,features = "HPV33.reads",max.cutoff = 40)
```
```{r}
data = FetchData(object = acc1_cancer_cells,vars = "HPV33.reads")
print(
  data %>% 
  ggplot(aes( x=HPV33.reads)) + 
  geom_density()
)
```

